3. Les parlementaires sur le réseau social Twitter#
3.1. Analyse des données textuelles#
import pandas as pd
from lib.figures import *
from lib.constant import *
from lib.utils import *
from bokeh.io import output_notebook
output_notebook(hide_banner=True)
twitter_df = pd.read_parquet('data/twitter_fev_to_juin_2023_retraite_data.parquet')
intervention_frequency_per_group(twitter_df)
Show code cell source
from bokeh.models import TabPanel, Tabs
df = getCountDataframe(twitter_df,top_n=10)
tab1 = TabPanel(child=occurrenceDistributionPerGroupePolitique(df[df.num_words == 1]), title="1 mot")
tab2 = TabPanel(child=occurrenceDistributionPerGroupePolitique(df[df.num_words == 2]), title="2 mots")
tab3 = TabPanel(child=occurrenceDistributionPerGroupePolitique(df[df.num_words == 3]), title="3 mots")
show(Tabs(tabs=[tab1, tab2,tab3],sizing_mode ="stretch_width"))
from bokeh.models import TabPanel, Tabs
df = getCountDataframe(twitter_df,top_n=10)
tab1 = TabPanel(child=occurrenceDistributionPerPolitiqueOrientation(df[df.num_words == 1]), title="1 mot")
tab2 = TabPanel(child=occurrenceDistributionPerPolitiqueOrientation(df[df.num_words == 2]), title="2 mots")
tab3 = TabPanel(child=occurrenceDistributionPerPolitiqueOrientation(df[df.num_words == 3]), title="3 mots")
show(Tabs(tabs=[tab1, tab2,tab3],sizing_mode ="stretch_width"))
3.2. Network Data#
twitter_df = pd.read_parquet('data/twitter_fev_to_juin_2023_retraite_data.parquet')
twitter_df = twitter_df[~(twitter_df.retweet_id == None)]
twitter_df= twitter_df[twitter_df.is_keywords]
deputy_df = pd.read_csv("data/2022_now_deputy.csv",sep=";")
slug2twitterat = dict(deputy_df["slug twitter".split()].values)
twitter_df["twitter_at"] = twitter_df.username.map(slug2twitterat)
twitter_df.head(2)
| username | full_text | date | in_reply_to_screen_name | in_reply_to_status_id_str | in_reply_to_user_id_str | retweet_id | retweet_username | retweet_user_id | is_quote_status | quoted_status_id_str | groupe_sigle | hashtag | is_hashtag | lemmatization | keywords_detected | is_keywords | twitter_at | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 464 | jean-luc-fugit | RT : La réforme des retraites soulève la quest... | 2023-02-01 00:00:59+00:00 | None | None | None | 1620569649967681542 | StanGuerini | 1911591212 | False | None | REN | [#retraites] | True | rt : le réforme de retraite soulever le questi... | [retraite, retrait, réforme, réforme de retrai... | True | Jean_LucFUGIT |
| 453 | laure-lavalette | RT : . (RN) interpelle (LFI) : "On ne comprend... | 2023-02-01 00:19:24+00:00 | None | None | None | 1620535420223213569 | LCP | 85362553 | False | None | RN | [] | False | rt : . ( RN ) interpelle ( LFI ) : " on ne com... | [obstruction, majorité] | True | LaureLavalette |
deputy_df["color"] = deputy_df.groupe_sigle.map(gp_politique_color)
dep2color = dict(deputy_df["twitter color".split()].values)
dep2sigle = dict(deputy_df["twitter groupe_sigle".split()].values)
def color(node):
if not node in dep2color:
return "#aaa"
return dep2color[node]
def gp_legend(node):
if not node in dep2sigle:
return "NA"
return dep2sigle[node]
import networkx as nx
graph_df = twitter_df["twitter_at retweet_username groupe_sigle".split()].astype(str)
graph_df = graph_df[~(graph_df.isna())]
#graph_df = graph_df[graph_df.retweet_username.isin(deputy_df.twitter.values)]
graph_df = graph_df.groupby("twitter_at retweet_username".split(),as_index=False).size()
G = nx.from_pandas_edgelist(graph_df,source="twitter_at",target="retweet_username",edge_attr="size",create_using=nx.DiGraph)
for node in list(G.nodes()):
if G.degree(node)<4:
G.remove_node(node)
if "None" in G: G.remove_node("None")
from ipysigma import Sigma,SigmaGrid
betweeness = nx.betweenness_centrality(G)
page_rank = nx.pagerank(G)
SigmaGrid(G,hide_search=False,columns=2).add(node_size=G.in_degree,name="In Degree",
node_color=dep2sigle,
default_node_border_color="#ffffff",
node_color_palette=gp_politique_color,
node_label_size=G.degree,
node_size_range=[3,20],
start_layout=10, default_edge_type="curve",
label_font="Arial",
edge_size_range=[0.1,1])\
.add(node_size=lambda x:betweeness[x],
node_color=dep2sigle,
default_node_border_color="#ffffff",
node_color_palette=gp_politique_color,
node_label_size=lambda x:betweeness[x],
start_layout=10, default_edge_type="curve",
label_font="Arial",
edge_size_range=[1,5],node_size_range=[3,20],name="Betweeness")\
.add(node_size=lambda x:page_rank[x],
node_color=dep2sigle,
default_node_border_color="#ffffff",
node_color_palette=gp_politique_color,
node_label_size=lambda x:page_rank[x],
start_layout=10, default_edge_type="curve",
label_font="Arial",
edge_size_range=[1,5],node_size_range=[3,20],name="Page Rank")